* Using PLANT CENTROIDS;
* Do proc ginside for each state and year for a given buffer around a plant; 
* get total turbine capacity within the buffer; 

* loop over year and state, append data, for each buffer;
* 100 mile buffer; 

* [] brackets indicate redacted internal census variable name or directory that can't be disclosed. ;

* define libraries;
libname icf '[directory location of geocoded worker residence files]';
libname hannah  '/projects/users/########';
libname ben '/projects/users/########';

/*
STEPS: 
- Get piks and lat/lon for workers in year/state from icf_us_addresses 
- PROC GINSIDE to flag inside the buffer
- DATA step to drop workers outside of buffer
- get total wind capacity in the buffer BY LOOPING THROUGH COUNTIES
- append to previous state/year
ONCE all are appended: 
- then merge with EHF workers - ehf_icf_states, 
      merge the wind capacity to worker dataset, create dummy for in/out of buffer,
      using SQL LEFT JOIN from hannah.ehf_icf_states to this dataset.
*/

* define macro "statecounty_year_100p" which does 100-mile buffer for certain years, states by county;
%macro statecounty_year_100p;
  %let stfip = 01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56;
  %local I next_fip;
  %let I = 1;
  %do %while (%scan(&stfip, &I) ne );
    %let next_fip = %scan(&stfip, &I);
      %do yr = 2000 %to 2015;
	*GET WORKER PIKS FOR A STATE/YEAR;
	*Create temp file of all piks residences in current state and year;
	DATA work.icf_&next_fip._&yr;
	  SET icf.icf_us_addresses (KEEP = [county id] pik [year] [latitude] [longitude]);
	  IF SUBSTR([county id],1,2) = "&next_fip" AND [year] = &yr;
	  y = [latitude]/1000000;
	  x = [longitude]/1000000;
	  DROP [latitude] [longitude];
	RUN;

	*Flag observations inside the buffer;
	PROC GINSIDE 
	  DATA	= work.icf_&next_fip._&yr
	  MAP 	= hannah.USmile100p
	  OUT	= hannah.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hannah.in_100m_&next_fip._&yr;
	set hannah.in_100m_&next_fip._&yr (keep = x y _ID_ pik [year] [county id]);
	rename _ID_ = id100;
	run;
	PROC GINSIDE 
	  DATA	= hannah.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile80p
	  OUT	= hannah.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hannah.in_100m_&next_fip._&yr;
	set hannah.in_100m_&next_fip._&yr (keep = x y _ID_ id100 pik [year] [county id]);
	rename _ID_ = id80;
	run;
	PROC GINSIDE 
	  DATA	= hannah.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile60p
	  OUT	= hannah.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hannah.in_100m_&next_fip._&yr;
	set hannah.in_100m_&next_fip._&yr (keep = x y _ID_ id80 id100 pik [year] [county id]);
	rename _ID_ = id60;
	run;
	PROC GINSIDE 
	  DATA	= hannah.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile40p
	  OUT	= hannah.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hannah.in_100m_&next_fip._&yr;
	set hannah.in_100m_&next_fip._&yr (keep = x y _ID_ id60 id80 id100 pik [year] [county id]);
	rename _ID_ = id40;
	run;
	PROC GINSIDE 
	  DATA	= hannah.in_100m_&next_fip._&yr
	  MAP 	= hannah.USmile20p
	  OUT	= hannah.in_100m_&next_fip._&yr
	  INCLUDEBORDER;
	  ID _ID_;
	RUN;
	data hannah.in_100m_&next_fip._&yr;
	set hannah.in_100m_&next_fip._&yr (keep = x y _ID_ id40 id60 id80 id100 pik [year] [county id]);
	rename _ID_ = id20;
	run;

	* Code dummy for inside/outside the buffer, rename;
	DATA hannah.in_100m_&next_fip._&yr;
	  SET hannah.in_100m_&next_fip._&yr (keep = pik [year] [county id] x y id100);
	  IF CMISS(id100) THEN DELETE;
	  statef = &next_fip;
	RUN;

	* Now bring in plant data, make separate dataset just within the buffer;
	* Subset plant data to year and state (&yr, &next_fip);
	data work.plnts_&yr;
	  set ben.plants_csm;
	  IF p_year <= &yr;
	run;

	* Now loop through counties in state &next_fip;
	%let sqlobs=0;
	%let cnties=;
	proc sql;
	  select distinct [county id] 
	    into :cnties 
	    separated by ' ' 
	    from hannah.in_100m_&next_fip._&yr where not missing([county id]);
	quit;
	%let my_count = &sqlobs;
	%if &my_count gt 0 %then %do;
	%do ct = 1 %to %sysfunc(countw(&cnties));
	  %let next_ct = %scan(&cnties,&ct,%str( ));
	    data work.in_100m_&next_fip._&yr._&next_ct;
	      set hannah.in_100m_&next_fip._&yr;
	      if [county id] = &next_ct;
	    run;
	    proc sql; 
	      CREATE TABLE work.hh_windcapsum_&next_fip._&yr._&next_ct AS
	      SELECT 
		A.*
		,E.stfips
		,E.ylat
		,E.xlong
		,E.p_year
		,E.p_cap	AS plantcap
		,E.p_tnum	AS turbnum
		,GEODIST(A.y, A.x, E.ylat, E.xlong, 'M' ) AS dist_to_turb
	      FROM work.in_100m_&next_fip._&yr._&next_ct AS a
		LEFT JOIN work.plnts_&yr AS e ON statef
	      WHERE CALCULATED dist_to_turb <= 100
		AND CALCULATED dist_to_turb ^=.
	      ORDER BY A.pik, A.[year]; 
	    quit;
	    proc sql;
	      CREATE TABLE work.hh_wcapsum_&next_fip._&yr._&next_ct AS
	      SELECT pik, [year], 
	      SUM(plantcap) as plantcap100,
	      SUM(turbnum) as turbnum100,
	      SUM(case when dist_to_turb<=80 then plantcap else 0 end) as plantcap80,
	      SUM(case when dist_to_turb<=80 then turbnum else 0 end) as turbnum80,
	      SUM(case when dist_to_turb<=60 then plantcap else 0 end) as plantcap60,
	      SUM(case when dist_to_turb<=60 then turbnum else 0 end) as turbnum60,
	      SUM(case when dist_to_turb<=40 then plantcap else 0 end) as plantcap40,
	      SUM(case when dist_to_turb<=40 then turbnum else 0 end) as turbnum40,
	      SUM(case when dist_to_turb<=20 then plantcap else 0 end) as plantcap20,
	      SUM(case when dist_to_turb<=20 then turbnum else 0 end) as turbnum20,
	      MAX([county id]) as [county id],
	      MAX(statef) as statef,
	      MAX(y) as y,
	      MAX(x) as x
	      FROM work.hh_windcapsum_&next_fip._&yr._&next_ct
	      GROUP BY pik, [year]
	      ORDER BY pik, [year];
	    quit;
	    PROC APPEND base = hannah.all_styr_100p 
	      data = work.hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN;
	    PROC DATASETS library = work nolist;
	      delete in_100m_&next_fip._&yr._&next_ct hh_windcapsum_&next_fip._&yr._&next_ct hh_wcapsum_&next_fip._&yr._&next_ct;
	    RUN; QUIT;
	  %end;
	 
	PROC DATASETS library = work nolist;
	  delete icf_&next_fip._&yr plnts_&yr ;
	RUN; QUIT;
	PROC DATASETS library = hannah nolist;
	  delete in_100m_&next_fip._&yr;
	RUN; QUIT;
      %end;
    %end;
    %let I = %eval(&I + 1);
  %end;
%mend;


* remove any old copies of appended "base" dataset before running macro;
PROC DATASETS library = hannah nolist;
  delete all_styr_100p;
RUN; QUIT;

%statecounty_year_100p;







